{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import datetime\n",
    "import os\n",
    "import pickle\n",
    "import requests\n",
    "import urllib\n",
    "from itertools import groupby\n",
    "from operator import itemgetter\n",
    "from typing import Dict, Tuple\n",
    "from multiprocessing import Pool\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sqlalchemy as sa\n",
    "from scipy.sparse import save_npz, load_npz, csr_matrix\n",
    "from tqdm import tqdm\n",
    "\n",
    "os.environ['MKL_NUM_THREADS'] = '1'\n",
    "DT = datetime.datetime.now().strftime('%Y-%m-%d')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Загружаем csv\n",
    "\n",
    "`user_id` - результат рандома\n",
    "`item_id` - результат рандома"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1975696, 4)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>show_timestamp</th>\n",
       "      <th>show_duration</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>912948920</td>\n",
       "      <td>1587935070</td>\n",
       "      <td>1119307</td>\n",
       "      <td>323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1882728205</td>\n",
       "      <td>1466874188</td>\n",
       "      <td>1115796</td>\n",
       "      <td>1428</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>382105433</td>\n",
       "      <td>276839040</td>\n",
       "      <td>1116585</td>\n",
       "      <td>921</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      user_id     item_id  show_timestamp  show_duration\n",
       "0   912948920  1587935070         1119307            323\n",
       "1  1882728205  1466874188         1115796           1428\n",
       "2   382105433   276839040         1116585            921"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_item_views_df = pd.read_csv('data/user_item_views.zip', compression='zip')\n",
    "print(user_item_views_df.shape)\n",
    "user_item_views_df.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Для трансформации в csr создаём индексы"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Индекс создан: 168756 строк 9991 столбцов\n"
     ]
    }
   ],
   "source": [
    "unique_users = user_item_views_df.user_id.unique()\n",
    "unique_items = user_item_views_df.item_id.unique()\n",
    "item_to_id = {j: i for i, j in enumerate(unique_items)}\n",
    "id_to_item = {j: i for i, j in item_to_id.items()}\n",
    "user_to_id = {j: i for i, j in enumerate(unique_users)}\n",
    "print('Индекс создан: %d строк %d столбцов' % (len(user_to_id), len(item_to_id)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Трансформация в csr\n",
    "\n",
    "Для каждого пользователя оставляем top-20 последних просмотров"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Данные сохранены в train_set_2020-11-26.npz\n",
      "CPU times: user 5.49 s, sys: 325 ms, total: 5.82 s\n",
      "Wall time: 5.83 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "HISTORY_TOP = 20\n",
    "user_item_views_df['rank'] = (\n",
    "    user_item_views_df\n",
    "    .groupby(by=['user_id'])['show_timestamp']\n",
    "    .rank(method='first', ascending=False)\n",
    ")\n",
    "ui_slim_df = user_item_views_df[user_item_views_df['rank'] < HISTORY_TOP][['user_id', 'item_id']]\n",
    "num_rows = len(user_to_id)\n",
    "num_cols = len(item_to_id)\n",
    "entries = np.ones(ui_slim_df.shape[0])\n",
    "rows = tuple(user_to_id[i] for i in ui_slim_df.user_id.values)\n",
    "cols = tuple(item_to_id[i] for i in ui_slim_df.item_id.values)\n",
    "\n",
    "train_set_csr = csr_matrix(\n",
    "    (entries, (rows, cols)),\n",
    "    shape=(num_rows, num_cols),\n",
    "    dtype=np.float32\n",
    ")\n",
    "train_set_csr\n",
    "save_npz(f'train_set_{DT}.npz', train_set_csr)\n",
    "print('Данные сохранены в %s' % f'train_set_{DT}.npz')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Обучение модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting implicit==0.4.2\n",
      "  Downloading implicit-0.4.2.tar.gz (1.1 MB)\n",
      "\u001b[K     |████████████████████████████████| 1.1 MB 251 kB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from implicit==0.4.2) (1.19.4)\n",
      "Requirement already satisfied: scipy>=0.16 in /opt/conda/lib/python3.8/site-packages (from implicit==0.4.2) (1.5.3)\n",
      "Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.8/site-packages (from implicit==0.4.2) (4.51.0)\n",
      "Building wheels for collected packages: implicit\n",
      "  Building wheel for implicit (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for implicit: filename=implicit-0.4.2-cp38-cp38-linux_x86_64.whl size=4756868 sha256=65219971a60e1c0a08e849073d851c7b30d6c4da75d19f1a188380c7c9ff0f76\n",
      "  Stored in directory: /home/jovyan/.cache/pip/wheels/97/dd/5f/df702090a221c1b1cc4683950b6d086eeee98d37a547f20f8f\n",
      "Successfully built implicit\n",
      "Installing collected packages: implicit\n",
      "Successfully installed implicit-0.4.2\n"
     ]
    }
   ],
   "source": [
    "!pip install implicit==0.4.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "35d51b5a7bc443ffb55715883f3ff8e3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "from implicit.als import AlternatingLeastSquares\n",
    "\n",
    "implict_als_params = {'factors': 4, 'iterations': 1}\n",
    "model = AlternatingLeastSquares(**implict_als_params)\n",
    "# транспонируем обязательно!\n",
    "model.fit(train_set_csr.T.tocsr())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Пример работы модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_id</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6820</th>\n",
       "      <td>1622781290</td>\n",
       "      <td>Гладиатор</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10323</th>\n",
       "      <td>1666115004</td>\n",
       "      <td>Илон Маск: Настоящий железный человек</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12414</th>\n",
       "      <td>166044284</td>\n",
       "      <td>Карибские острова 3D: Погружение с акулами</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15345</th>\n",
       "      <td>1593139110</td>\n",
       "      <td>Тачки 3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19707</th>\n",
       "      <td>1700203196</td>\n",
       "      <td>Разрушитель</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28254</th>\n",
       "      <td>1507387779</td>\n",
       "      <td>Вольт</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29777</th>\n",
       "      <td>160084426</td>\n",
       "      <td>Темные воды</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          item_id                                       title\n",
       "6820   1622781290                                   Гладиатор\n",
       "10323  1666115004       Илон Маск: Настоящий железный человек\n",
       "12414   166044284  Карибские острова 3D: Погружение с акулами\n",
       "15345  1593139110                                     Тачки 3\n",
       "19707  1700203196                                 Разрушитель\n",
       "28254  1507387779                                       Вольт\n",
       "29777   160084426                                 Темные воды"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def id_to_content_df(ids: np.array, content_df: pd.DataFrame, id_to_item):\n",
    "    items = tuple(id_to_item[i] for i in ids)\n",
    "    result_df = content_catalog[\n",
    "        content_catalog.item_id.isin(items)\n",
    "    ]\n",
    "    return result_df\n",
    "\n",
    "content_catalog = pd.read_csv('data/content_catalog.zip', compression='zip')\n",
    "\n",
    "random_history = train_set_csr[np.random.randint(low=0, high=train_set_csr.shape[0])]\n",
    "id_to_content_df(random_history.nonzero()[1], content_catalog, id_to_item)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Проверим, что рекоммендует модель"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_id</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4098</th>\n",
       "      <td>984316348</td>\n",
       "      <td>Гарри Поттер и Принц-полукровка</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10630</th>\n",
       "      <td>784227936</td>\n",
       "      <td>Эверест</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12028</th>\n",
       "      <td>125673055</td>\n",
       "      <td>Камуфляж и шпионаж</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13380</th>\n",
       "      <td>1757140863</td>\n",
       "      <td>Корпорация монстров</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25068</th>\n",
       "      <td>1055805812</td>\n",
       "      <td>Тачки</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26561</th>\n",
       "      <td>1729092206</td>\n",
       "      <td>Гарри Поттер и узник Азкабана</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27442</th>\n",
       "      <td>157249585</td>\n",
       "      <td>Капоне. Лицо со шрамом</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28064</th>\n",
       "      <td>1129147321</td>\n",
       "      <td>Волк с Уолл-стрит</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28796</th>\n",
       "      <td>538560695</td>\n",
       "      <td>Разговорник</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29850</th>\n",
       "      <td>1858036661</td>\n",
       "      <td>Фиксики против Кработов</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          item_id                            title\n",
       "4098    984316348  Гарри Поттер и Принц-полукровка\n",
       "10630   784227936                          Эверест\n",
       "12028   125673055               Камуфляж и шпионаж\n",
       "13380  1757140863              Корпорация монстров\n",
       "25068  1055805812                            Тачки\n",
       "26561  1729092206    Гарри Поттер и узник Азкабана\n",
       "27442   157249585           Капоне. Лицо со шрамом\n",
       "28064  1129147321                Волк с Уолл-стрит\n",
       "28796   538560695                      Разговорник\n",
       "29850  1858036661          Фиксики против Кработов"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommends = model.recommend(\n",
    "            userid = 0,\n",
    "            user_items=random_history,\n",
    "            N=10,\n",
    "            filter_already_liked_items=True,\n",
    "            recalculate_user=True\n",
    ")\n",
    "id_to_content_df([rec[0] for rec in recommends], content_catalog, id_to_item)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Валидация модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13163 13163\n"
     ]
    }
   ],
   "source": [
    "with open('data/ground_truth_dataset.pkl', 'rb') as f:\n",
    "    ground_truth_dataset = pickle.load(f)\n",
    "with open('data/test_dataset.pkl', 'rb') as f:\n",
    "    test_dataset = pickle.load(f)\n",
    "print(len(test_dataset), len(ground_truth_dataset))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13163\n"
     ]
    }
   ],
   "source": [
    "def get_als_action_history_vector(item_to_id: Dict[int, int], action_history, binary=True) -> np.ndarray:\n",
    "    \"\"\"Получить историю действий для ALS\n",
    "\n",
    "    :param item_to_id: справочник контента ALS\n",
    "    :return:\n",
    "    \"\"\"\n",
    "    als_action_history_vector = np.zeros(len(item_to_id), dtype=int)\n",
    "    for iid, item_attr in action_history.items():\n",
    "        if iid in item_to_id.keys():\n",
    "            if binary:\n",
    "                als_action_history_vector[item_to_id[iid]] = 1\n",
    "            else:\n",
    "                als_action_history_vector[item_to_id[iid]] = item_attr\n",
    "    return als_action_history_vector\n",
    "\n",
    "def vectorize_action_history(action_history):\n",
    "    res = get_als_action_history_vector(item_to_id, action_history)\n",
    "    return res\n",
    "\n",
    "with Pool(5) as p:\n",
    "    test_dataset_vectors = p.map(vectorize_action_history, test_dataset)\n",
    "    ground_truth_dataset_vectors = p.map(vectorize_action_history, ground_truth_dataset)\n",
    "print(len(test_dataset_vectors))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Готовим данные для мультипроцессинга - объединяем в один массив историю пользователя и валидационные просмотры"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_valid_pairs = []\n",
    "for test_user_id in range(len(test_dataset_vectors)):\n",
    "    train_valid_pairs.append((\n",
    "        csr_matrix(test_dataset_vectors[test_user_id]),\n",
    "        ground_truth_dataset_vectors[test_user_id].nonzero()[0]\n",
    "    ))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.17412443971738964\n",
      "CPU times: user 1.08 s, sys: 213 ms, total: 1.3 s\n",
      "Wall time: 41.1 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "N = 40\n",
    "testing_model = model\n",
    "\n",
    "def top_n_recommends(watch_history):\n",
    "    top_n_result = testing_model.recommend(\n",
    "            userid = 0,\n",
    "            user_items=watch_history[0],\n",
    "            N=N,\n",
    "            filter_already_liked_items=True,\n",
    "            recalculate_user=True\n",
    "    )\n",
    "    hit = 0\n",
    "    if len(watch_history[1]) > 0 and np.intersect1d(watch_history[1], top_n_result).size > 0:\n",
    "        hit = 1\n",
    "    return hit\n",
    "\n",
    "with Pool(5) as p:\n",
    "    hits = p.map(top_n_recommends, train_valid_pairs)\n",
    "print(sum(hits)/len(hits))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Бейзлайны\n",
    "\n",
    "top 100 популярного"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.18438046038137204\n",
      "CPU times: user 534 ms, sys: 274 ms, total: 807 ms\n",
      "Wall time: 978 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "N = 40\n",
    "content_popularity = np.asarray(train_set_csr.sum(axis=0)).reshape(-1)\n",
    "top_100_popular_items = np.argsort(-content_popularity)[:100]\n",
    "\n",
    "def top_n_recommends(watch_history):\n",
    "    top_n_result = top_100_popular_items[:N]\n",
    "    hit = 0\n",
    "    if len(watch_history[1]) > 0 and np.intersect1d(watch_history[1], top_n_result).size > 0:\n",
    "        hit = 1\n",
    "    return hit\n",
    "\n",
    "with Pool(5) as p:\n",
    "    hits = p.map(top_n_recommends, train_valid_pairs)\n",
    "print(sum(hits)/len(hits))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Рандом"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.008432728101496619\n",
      "CPU times: user 544 ms, sys: 228 ms, total: 772 ms\n",
      "Wall time: 1.04 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "N = 50\n",
    "content_popularity = np.asarray(train_set_csr.sum(axis=0)).reshape(-1)\n",
    "all_content = np.array(list(id_to_item.keys()))\n",
    "\n",
    "def top_n_recommends(watch_history):\n",
    "    top_n_result = np.random.choice(all_content, size=N, replace=True)\n",
    "    hit = 0\n",
    "    if len(watch_history[1]) > 0 and np.intersect1d(watch_history[1], top_n_result).size > 0:\n",
    "        hit = 1\n",
    "    return hit\n",
    "\n",
    "with Pool(5) as p:\n",
    "    hits = p.map(top_n_recommends, train_valid_pairs)\n",
    "print(sum(hits)/len(hits))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Пример с обучением модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "95379df31e414f0ea998787c54ec024d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=30.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "implict_als_params = {'factors': 20, 'iterations': 30}\n",
    "model = AlternatingLeastSquares(**implict_als_params)\n",
    "# транспонируем обязательно!\n",
    "model.fit(train_set_csr.T.tocsr())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.25845172073235584\n",
      "CPU times: user 1.19 s, sys: 157 ms, total: 1.35 s\n",
      "Wall time: 52.5 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "N = 50\n",
    "testing_model = model\n",
    "\n",
    "def top_n_recommends(watch_history):\n",
    "    top_n_result = testing_model.recommend(\n",
    "            userid = 0,\n",
    "            user_items=watch_history[0],\n",
    "            N=N,\n",
    "            filter_already_liked_items=True,\n",
    "            recalculate_user=True\n",
    "    )\n",
    "    hit = 0\n",
    "    if len(watch_history[1]) > 0 and np.intersect1d(watch_history[1], top_n_result).size > 0:\n",
    "        hit = 1\n",
    "    return hit\n",
    "\n",
    "with Pool(5) as p:\n",
    "    hits = p.map(top_n_recommends, train_valid_pairs)\n",
    "print(sum(hits)/len(hits))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Проверяем рекомендации на обученной модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_id</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4688</th>\n",
       "      <td>446911865</td>\n",
       "      <td>Фиксики: Большой секрет</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7185</th>\n",
       "      <td>465257123</td>\n",
       "      <td>Основатель</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8382</th>\n",
       "      <td>866816808</td>\n",
       "      <td>Тачки 2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13380</th>\n",
       "      <td>1757140863</td>\n",
       "      <td>Корпорация монстров</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16205</th>\n",
       "      <td>1687588185</td>\n",
       "      <td>Щенячий патруль: Мегащенки</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25068</th>\n",
       "      <td>1055805812</td>\n",
       "      <td>Тачки</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26592</th>\n",
       "      <td>821523592</td>\n",
       "      <td>В поисках Немо</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27811</th>\n",
       "      <td>442883224</td>\n",
       "      <td>Университет монстров</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28064</th>\n",
       "      <td>1129147321</td>\n",
       "      <td>Волк с Уолл-стрит</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29850</th>\n",
       "      <td>1858036661</td>\n",
       "      <td>Фиксики против Кработов</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          item_id                       title\n",
       "4688    446911865     Фиксики: Большой секрет\n",
       "7185    465257123                  Основатель\n",
       "8382    866816808                     Тачки 2\n",
       "13380  1757140863         Корпорация монстров\n",
       "16205  1687588185  Щенячий патруль: Мегащенки\n",
       "25068  1055805812                       Тачки\n",
       "26592   821523592              В поисках Немо\n",
       "27811   442883224        Университет монстров\n",
       "28064  1129147321           Волк с Уолл-стрит\n",
       "29850  1858036661     Фиксики против Кработов"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommends = model.recommend(\n",
    "            userid = 0,\n",
    "            user_items=random_history,\n",
    "            N=10,\n",
    "            filter_already_liked_items=True,\n",
    "            recalculate_user=True\n",
    ")\n",
    "id_to_content_df([rec[0] for rec in recommends], content_catalog, id_to_item)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Загружаем JSON\n",
    "\n",
    "Нужно распаковать архив и подготовить его для загрузки в Mongo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tarfile\n",
    "with tarfile.open('data/json_views.tar.gz', 'r') as json_tar:\n",
    "    json_tar.extractall('data')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
